Setup

Load R libraries

library(data.table)
library(ggplot2)
library(ggpubr)
library(tidyr)
library(limma)
library(biomaRt)
library(fgsea)
library(goseq)

theme_set(theme_classic())

cell_type_name = params$cell_type_name
graph_weight = params$graph_weight

cell_type_name
## [1] "cd4"
graph_weight
## [1] "1.0"

Check enrichment of gene sets

Read in gene info and gene set assignments

file_tag = sprintf("%s_BL_%s", cell_type_name, graph_weight)

assayed_genes = scan(sprintf("output/gene_list_%s.txt", file_tag), 
                     what = character(), sep="\n")

gene_sets = scan(sprintf("output/name_s_%s.txt", file_tag), 
                 what = character(), sep="\n")

gene_sets = sapply(gene_sets, strsplit, USE.NAMES=FALSE, split=",")
n_genes   = sapply(gene_sets, length)
names(n_genes) = NULL
summary(n_genes)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   13.00   17.00   18.00   17.82   19.00   21.00
length(n_genes)
## [1] 40
sort(n_genes)
##  [1] 13 14 15 16 17 17 17 17 17 17 17 17 17 17 17 18 18 18 18 18 18 18 18 18 18
## [26] 18 18 19 19 19 19 19 19 19 19 19 20 20 20 21

Find gene symbols

Find gene symbols from bioMart.

All the gene symbols that can be found in bioMart are consistent with what we have. So no need to run it.

ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl")

gene_BM = getBM(attributes = c("hgnc_symbol", "external_gene_name"), 
                filters = "external_gene_name", 
                values = assayed_genes, 
                mart = ensembl)
length(assayed_genes)
dim(gene_BM)
gene_BM[1:2,]

table(assayed_genes %in% gene_BM$external_gene_name)

t1 = table(gene_BM$external_gene_name)
dup = names(t1)[t1 > 1]
gene_BM[gene_BM$external_gene_name %in% dup,]

table(gene_BM$hgnc_symbol == gene_BM$external_gene_name)
w2kp = which(gene_BM$hgnc_symbol != gene_BM$external_gene_name)
gene_BM[w2kp,]

Find gene symbols using the alias2Symbol function from limma.

a2s = rep(NA, length(assayed_genes))
for(i in 1:length(assayed_genes)){
  gi = assayed_genes[i]
  ai = alias2Symbol(gi)
  if(length(ai) > 1){
    print(gi)
    print(ai)
  }
  a2s[i] = ai[1]
}
## [1] "HIST1H2BC"
## [1] "H2BC5" "H2BC4"
## [1] "MPP6"
## [1] "MPHOSPH6" "PALS2"   
## [1] "MARS"
## [1] "MARS1" "SLA2" 
## [1] "SEPT2"
## [1] "SEPTIN6" "SEPTIN2"
table(is.na(a2s))
## 
## FALSE  TRUE 
##  1951    49
table(a2s == assayed_genes, useNA = 'ifany')
## 
## FALSE  TRUE  <NA> 
##    45  1906    49
gene_info = data.table(sym_in_data = assayed_genes, sym_limma = a2s)

gene_info[sym_in_data != sym_limma,]
##      sym_in_data   sym_limma
##  1:      ADPRHL2       ADPRS
##  2:          AES        TLE5
##  3:     C12orf45    NOPCHAP1
##  4:      C3orf58      DIPK2A
##  5:      C6orf99   LINC02901
##  6:        CBWD2       ZNG1B
##  7:      CXorf57        RADX
##  8:      FAM102A       EEIG1
##  9:      FAM122C      PABIR3
## 10:      FAM153C    FAM153CP
## 11:     FAM160A2      FHIP1B
## 12:        GRASP     TAMALIN
## 13:        H2AFX        H2AX
## 14:    HIST1H2AG      H2AC11
## 15:    HIST1H2BC       H2BC5
## 16:    HIST1H2BK      H2BC12
## 17:    HIST1H2BN      H2BC15
## 18:     HIST1H3A        H3C1
## 19:     HIST1H3H       H3C10
## 20:     HIST1H4C        H4C3
## 21:    HIST2H2BF      H2BC18
## 22:         LRMP       IRAG2
## 23:      MFSD14C    MFSD14CP
## 24:         MKL1       MRTFA
## 25:         MPP6    MPHOSPH6
## 26:  RNASEH1-AS1  RNASEH1-DT
## 27:        SEPT6     SEPTIN6
## 28:        SEPT9     SEPTIN9
## 29: TMEM161B-AS1 TMEM161B-DT
## 30:        ARNTL       BMAL1
## 31:     C6orf106       ILRUN
## 32:     C6orf203      MTRES1
## 33:      FAM129A      NIBAN1
## 34:     FAM160B1      FHIP2A
## 35:      FAM192A    PSME3IP1
## 36:        HEXDC        HEXD
## 37:     HIST1H1E        H1-4
## 38:     KIAA0100       BLTP2
## 39:     KIAA1551       RESF1
## 40:         LARS       LARS1
## 41:         MARS       MARS1
## 42:      PLA2G16      PLAAT3
## 43:        SEPT2     SEPTIN6
## 44:       SMIM37        MTLN
## 45:         YARS       YARS1
##      sym_in_data   sym_limma
gene_info[, gene_symbol := sym_in_data]
gene_info[which(sym_in_data != sym_limma), gene_symbol := sym_limma]

dim(gene_info)
## [1] 2000    3
gene_info[1:5,]
##    sym_in_data sym_limma gene_symbol
## 1:       ABCD3     ABCD3       ABCD3
## 2:       ABCG1     ABCG1       ABCG1
## 3:       ABHD5     ABHD5       ABHD5
## 4:        ABI1      ABI1        ABI1
## 5:      ABLIM1    ABLIM1      ABLIM1
t1 = table(gene_info$gene_symbol)
table(t1)
## t1
##    1    2 
## 1998    1
gene_info[gene_symbol %in% names(t1)[t1 == 2],]
##    sym_in_data sym_limma gene_symbol
## 1:       SEPT6   SEPTIN6     SEPTIN6
## 2:       SEPT2   SEPTIN6     SEPTIN6
gene_info[sym_in_data == "HIST1H2BC", gene_symbol:="H2BC4"]
gene_info[sym_in_data == "SEPT6", gene_symbol:="SEPTIN6"]
gene_info[sym_in_data == "SEPT2", gene_symbol:="SEPTIN2"]

Prepare gene set information

Gene set annotations (by gene symbols) were downloaded from MSigDB website.

gmtfile = list()
gmtfile[["reactome"]] = "../Annotation/c2.cp.reactome.v2023.2.Hs.symbols.gmt"
gmtfile[["go_bp"]]    = "../Annotation/c5.go.bp.v2023.2.Hs.symbols.gmt"
gmtfile[["immune"]]   = "../Annotation/c7.all.v2023.2.Hs.symbols.gmt"

pathways = list()
for(k1 in names(gmtfile)){
  pathways[[k1]] = gmtPathways(gmtfile[[k1]])
}

names(pathways)
## [1] "reactome" "go_bp"    "immune"
sapply(pathways, length)
## reactome    go_bp   immune 
##     1692     7647     5219

Filter gene sets for size between 10 and 500.

lapply(pathways, function(v){
  quantile(sapply(v, length), probs = seq(0, 1, 0.1), na.rm = TRUE)
})
## $reactome
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%   100% 
##    5.0    7.0    9.0   12.0   17.0   23.0   31.0   44.0   71.8  120.9 1463.0 
## 
## $go_bp
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%   100% 
##    5.0    6.0    8.0   10.0   14.0   19.0   29.0   46.0   80.8  183.0 1966.0 
## 
## $immune
##   0%  10%  20%  30%  40%  50%  60%  70%  80%  90% 100% 
##    5  162  193  197  199  199  200  200  200  200 1992
for(k1 in names(pathways)){
  p1 = pathways[[k1]]
  pathways[[k1]] = p1[sapply(p1, length) %in% 10:500]
}

Conduct enrichment analysis

dim(gene_info)
## [1] 2000    3
max_n2kp = 10

goseq_res = NULL

for(k in 1:length(gene_sets)){
  if(length(gene_sets[[k]]) < 10) { next }
  
  print(k)
  set_k = paste0("set_", k)
  print(gene_sets[[k]])
  
  genes = gene_info$sym_in_data %in% gene_sets[[k]]
  names(genes) = gene_info$gene_symbol
  table(genes)
  
  pwf = nullp(genes, "hg38", "geneSymbol")

  for(k1 in names(pathways)){
    p1 = pathways[[k1]]
    res1 = goseq(pwf, "hg38", "geneSymbol", 
                 gene2cat=goseq:::reversemapping(p1))
    res1$FDR  = p.adjust(res1$over_represented_pvalue, method="BH")
    
    nD = sum(res1$FDR < 0.1)
    
    if(nD > 0){
      res1 = res1[order(res1$FDR),][1:min(nD, max_n2kp),]
      res1$category = gsub("REACTOME_|GOBP_", "", res1$category)
      res1$category = gsub("_", " ", res1$category)
      res1$category = tolower(res1$category)
      res1$category = substr(res1$category, start=1, stop=81)
      goseq_res[[set_k]][[k1]] = res1
    }
  }
}
## [1] 1
##  [1] "AC004854.2" "BX284668.6" "KCNQ1OT1"   "ARHGAP10"   "CARD11"    
##  [6] "CCL5"       "CCR4"       "FAM53B"     "IRF9"       "LTBP3"     
## [11] "MYO1F"      "NEAT1"      "PPRC1"      "SH3BP2"     "VCAN"      
## [16] "ZEB2"       "ZNF267"

## [1] 2
##  [1] "AC116407.2"  "ADGRG1"      "APOL1"       "BISPR"       "CCL4"       
##  [6] "CD300A"      "GPR132"      "GPR65"       "GRK2"        "LINC01871"  
## [11] "MIAT"        "MIR4435-2HG" "MYBL1"       "S1PR5"       "TSPAN32"    
## [16] "USP30-AS1"   "ZNF683"

## [1] 3
##  [1] "AC008105.3"   "AC025171.2"   "BOLA2-SMG1P6" "C12orf29"     "IGLV1-44"    
##  [6] "LINC01215"    "LRMP"         "MDS2"         "PLCL1"        "RETREG1"     
## [11] "RNASEH1-AS1"  "TRAV12-1"     "TRAV12-2"     "TRAV21"       "TRAV25"      
## [16] "TRAV38-2DV8"  "TRAV8-6"      "TRBV5-4"      "TRBV6-5"      "ZNF600"      
## [21] "ZNF749"

## [1] 4
##  [1] "AC004687.1" "GOLGA8B"    "GZMK"       "LST1"       "NUP58"     
##  [6] "PARP8"      "PLEKHM1"    "RABL2B"     "RGS1"       "TECPR1"    
## [11] "ZNF10"      "ABHD3"      "NKG7"       "NRDC"       "PPP2R3C"   
## [16] "SPON2"      "TGFBR3"

## [1] 5
##  [1] "ABCG1"   "CD28"    "DSE"     "ERCC5"   "LRRC8D"  "MAP3K8"  "MZF1"   
##  [8] "PLK3"    "EFR3A"   "FAM129A" "GMFB"    "GNPTAB"  "ISG20"   "MYO1G"  
## [15] "PTGER2"  "RNF19A"  "SYTL1"   "TGS1"    "TRIB2"

## [1] 6
##  [1] "AC145124.1" "AP002360.1" "DNASE1"     "FAM122C"    "FBXO8"     
##  [6] "GIMAP6"     "JAML"       "KCNK6"      "LRRN3"      "NUAK2"     
## [11] "RPS26"      "SLC25A38"   "TRAV23DV6"  "TRAV41"     "XIST"      
## [16] "ZNF140"     "ZNF84"

## [1] 7
##  [1] "GGT7"    "ZNF490"  "ASCL2"   "BCL9L"   "CCDC112" "CD58"    "EHMT1"  
##  [8] "GALNS"   "GBP1"    "GBP3"    "GBP5"    "GZMH"    "HIVEP3"  "IFI35"  
## [15] "KLF9"    "RAB8B"   "SCAF8"   "TBX21"

## [1] 8
##  [1] "CD40LG"   "GIMAP1"   "IGKV1-5"  "IGKV3-20" "LAX1"     "MBNL2"   
##  [7] "NBPF14"   "NUDT4"    "PCMTD2"   "PDE7A"    "PYROXD1"  "SLC26A11"
## [13] "SPIDR"    "TUBD1"    "TUBE1"    "ZNF677"   "SUSD6"

## [1] 9
##  [1] "C16orf74" "FAM153C"  "GRASP"    "LYRM7"    "STARD10"  "STX16"   
##  [7] "TOB2"     "YPEL2"    "ZNF831"   "ZSCAN18"  "ABHD2"    "FAM160B1"
## [13] "FRY"      "FRYL"     "NBEAL2"   "PEX26"    "SUGP2"

## [1] 10
##  [1] "AC023157.3"  "AC025171.3"  "AL121944.1"  "AL135791.1"  "AL359220.1" 
##  [6] "AL627171.1"  "AL645728.1"  "C6orf99"     "LINC02273"   "NDUFV2-AS1" 
## [11] "NSMCE3"      "PRR7"        "AC016831.7"  "AKNA"        "AREG"       
## [16] "ATAD2B"      "CEMIP2"      "TENT5C"      "THUMPD3-AS1"

## [1] 11
##  [1] "IER3"     "C1orf21"  "CMIP"     "CROT"     "DIP2A"    "EIF2AK4" 
##  [7] "GABPB2"   "KLHDC4"   "NQO2"     "PLA2G16"  "PPP2R5B"  "PREX1"   
## [13] "PTGDR"    "RBSN"     "RNPEPL1"  "SACS"     "TNFRSF18" "ZNF236"

## [1] 12
##  [1] "AC245297.3" "MUC20-OT1"  "OTULINL"    "ZFP14"      "ADGRE5"    
##  [6] "CARD16"     "GIMAP7"     "GZMM"       "HACD3"      "KANSL1-AS1"
## [11] "KIAA0040"   "LY6E"       "MFSD14A"    "NBDY"       "NDUFC1"    
## [16] "RAP1GAP2"   "SLFN12L"    "TM2D1"      "TRANK1"     "TUT4"

## [1] 13
##  [1] "CRLF3"    "GIMAP8"   "HOXB2"    "IGLV2-14" "MAST4"    "ODF2L"   
##  [7] "SIMC1"    "THAP6"    "ZFX"      "DENND4B"  "FCRL6"    "IFI44"   
## [13] "IFI44L"   "MX2"      "OAS1"     "OAS2"     "PHF23"    "PRSS23"  
## [19] "RSAD2"

## [1] 14
##  [1] "ADPRHL2" "ANKH"    "MHENCR"  "NEU1"    "ZNF821"  "ABCC1"   "G2E3"   
##  [8] "HEXDC"   "MLLT10"  "MLLT6"   "MYOM2"   "PARP4"   "TIPARP"  "TSPAN14"
## [15] "TUBGCP6" "WDR43"   "ZNF708"

## [1] 15
##  [1] "AC007952.4" "AC119396.1" "AC245014.3" "AL138963.3" "ARHGAP15"  
##  [6] "ATP5MG"     "CITED2"     "FCMR"       "FYB1"       "LIMD2"     
## [11] "MTRNR2L12"  "NOP53"      "RACK1"      "SCML4"      "SNHG8"     
## [16] "TBC1D10C"   "TC2N"       "TNFRSF25"   "WAPL"

## [1] 16
##  [1] "KIF9"    "TRBV7-3" "ABCC10"  "AP3M2"   "C4orf48" "COL6A3"  "CRTC3"  
##  [8] "CTBS"    "CYTIP"   "GPRIN3"  "LAIR2"   "SBNO2"   "SDR39U1" "SLC9A8" 
## [15] "STK17B"  "TAF4B"   "TMEM156"

## [1] 17
##  [1] "AC015982.1" "AC016405.3" "AC083880.1" "AC091271.1" "AC103591.3"
##  [6] "AF213884.3" "AL357060.1" "AL451085.1" "ARF4-AS1"   "HIPK1-AS1" 
## [11] "ID3"        "LINC01465"  "MZF1-AS1"   "NOCT"       "NPIPB11"   
## [16] "NPIPB4"     "OSER1-DT"   "SDR42E2"    "THAP9-AS1"  "TRBV6-6"

## [1] 18
##  [1] "ENOSF1"   "FAM227B"  "NSUN6"    "TBCCD1"   "CFD"      "CRYBG1"  
##  [7] "ECPAS"    "ITK"      "LPIN1"    "LPIN2"    "NNT-AS1"  "ODF3B"   
## [13] "RTKN2"    "SAMD9L"   "TOMM70"   "TRBV6-1"  "TTC16"    "Z93930.2"
## [19] "ZBP1"

## [1] 19
##  [1] "ANK3"     "ATXN7L3B" "CCDC88B"  "CISH"     "ICE1"     "IFNGR2"  
##  [7] "KHNYN"    "NFKBIZ"   "P4HTM"    "PIEZO1"   "PRDM1"    "SESN3"   
## [13] "SLC20A2"  "TOB1"     "TSHZ2"    "UGCG"     "ZNF101"   "ZNF593"

## [1] 20
##  [1] "KLF10"   "SH3YL1"  "ATP8B2"  "CD69"    "CITED4"  "CRIP2"   "DDIT4"  
##  [8] "HELB"    "ICOS"    "KCNAB2"  "KIF21B"  "NPDC1"   "PKNOX1"  "PREP"   
## [15] "REXO2"   "SLC35A2" "TBC1D14" "ZDHHC20"

## [1] 21
##  [1] "AIF1"    "CCDC66"  "RASA2"   "SENP7"   "SIAH2"   "TBPL1"   "CHD6"   
##  [8] "ERICH1"  "GNLY"    "MT2A"    "NCOA7"   "NLRC5"   "PCED1B"  "RAPGEF1"
## [15] "STAT4"   "STK10"   "TNFRSF4" "XAF1"

## [1] 22
##  [1] "ARL6IP1" "CCDC141" "CENPK"   "CXorf57" "PHYH"    "SLC38A2" "TSC22D2"
##  [8] "WARS2"   "ADAM19"  "CREBZF"  "GNPAT"   "NARF"    "NFE2L1"  "SETD5"  
## [15] "SH3BP5"  "SLA"     "SMAP1"   "SNX1"    "XBP1"

## [1] 23
##  [1] "AC009061.2" "AC020911.2" "AC025164.1" "AC084033.3" "AC087239.1"
##  [6] "AL118516.1" "ATP2B1-AS1" "CERNA1"     "CSKMT"      "IL23A"     
## [11] "ILF3-DT"    "LYRM9"      "MATR3-1"    "Z93241.1"   "AC020915.3"
## [16] "SNHG9"

## [1] 24
##  [1] "SNHG7"      "AC022916.1" "CARD8-AS1"  "CTSW"       "CX3CR1"    
##  [6] "FGFBP2"     "IL21R"      "KIF21A"     "MCTP2"      "NAA38"     
## [11] "PAXX"       "PLEK"       "RASAL3"     "TRAV29DV5"  "TRBV12-3"

## [1] 25
##  [1] "CARHSP1" "CCDC18"  "CEP120"  "CEP95"   "CHIC2"   "COG5"    "LCLAT1" 
##  [8] "MAP3K2"  "NSMAF"   "PECAM1"  "POC5"    "SBF2"    "SNX18"   "FNDC3B" 
## [15] "GCA"     "LAG3"    "SBF1"

## [1] 26
##  [1] "JPX"     "APH1B"   "ARHGEF3" "DTX3L"   "ETNK1"   "IRAK4"   "NT5C"   
##  [8] "PARP14"  "PARP9"   "PDE4B"   "PLAC8"   "PRDM2"   "RCBTB2"  "RNF145" 
## [15] "RNMT"    "SEC14L1" "SP140L"  "ZNF292"

## [1] 27
##  [1] "ANKRD44" "C6orf62" "INPP4B"  "MAML2"   "PECR"    "PHC1"    "PLK2"   
##  [8] "TPP2"    "WASHC4"  "ADTRP"   "DOCK2"   "PCGF5"   "PYCARD"  "SOCS2"  
## [15] "TAOK3"   "VPS36"   "ZNF276"

## [1] 28
##  [1] "LRRC8C-DT" "FAAH2"     "GIMAP4"    "HRH2"      "IFITM2"    "LRRC58"   
##  [7] "MT1X"      "NORAD"     "PARP11"    "PCSK1N"    "PUM3"      "S100A12"  
## [13] "SLC25A37"  "SLF2"      "SMIM37"    "SPATA13"   "TCAF2"     "UBALD2"   
## [19] "UTP25"

## [1] 29
##  [1] "ATG9B"    "CFAP36"   "DIP2B"    "FBXO3"    "HELQ"     "IL6R"    
##  [7] "KLHL6"    "LTB"      "OSER1"    "PLCD1"    "RCAN3"    "SLC22A17"
## [13] "STK19"    "TBCC"     "TBCK"     "TGIF1"    "TTC39C"   "C12orf4"

## [1] 30
##  [1] "AL133415.1" "C1GALT1"    "DPYD"       "KLF7"       "LEPROT"    
##  [6] "LINC01550"  "POLD4"      "RAB33B"     "RAB37"      "SLC7A6"    
## [11] "SPART"      "TMEM204"    "TRABD2A"    "TRAV8-3"    "XRRA1"     
## [16] "ZMAT1"      "ZNF506"     "ANKAR"      "APOL6"      "GAB3"

## [1] 31
##  [1] "BTN3A1"     "C17orf49"   "CTSF"       "ERAP2"      "EVI2B"     
##  [6] "HIVEP2"     "IPCEF1"     "METAP1"     "MTO1"       "ORC4"      
## [11] "PITPNA-AS1" "TMEM154"    "TMEM245"    "WHAMM"      "YY1AP1"    
## [16] "BTN3A2"     "KLRG1"      "TMEM175"

## [1] 32
##  [1] "AC027644.3" "AC087623.3" "AC093323.1" "AC097376.2" "AK5"       
##  [6] "AL139246.5" "ARRDC2"     "CHRM3-AS2"  "COX10"      "ERVK3-1"   
## [11] "GPCPD1"     "LETM2"      "LINC00649"  "ST7L"       "TRAV13-2"  
## [16] "TRAV14DV4"  "TRAV9-2"    "TRBV28"     "FRMD4B"

## [1] 33
##  [1] "COA1"    "MBD6"    "NR4A3"   "AGAP2"   "AZI2"    "B3GALT4" "KIF3B"  
##  [8] "N4BP1"   "PDE12"   "PIP4K2B" "RECK"    "SIGIRR"  "SLC23A2" "TBC1D15"
## [15] "TMX3"    "TRMT10C" "UBA7"

## [1] 34
##  [1] "ATF4"    "BTG2"    "FBXL3"   "HEATR5B" "LDLRAP1" "MTERF4"  "OXLD1"  
##  [8] "BATF"    "DDX41"   "IFNAR1"  "LPCAT3"  "MAF"     "PVT1"    "SYNRG"

## [1] 35
##  [1] "ACADSB"   "ARID4A"   "ATAD1"    "DAPP1"    "DBP"      "FAM118A" 
##  [7] "HDHD2"    "ING2"     "INTS6"    "MXD1"     "NABP1"    "PPP1R15B"
## [13] "SGSM3"    "SLC25A33" "TIA1"     "UBL3"     "UCP2"     "CAST"    
## [19] "GZMB"

## [1] 36
##  [1] "ADA2"         "COQ10A"       "JCHAIN"       "LINC00623"    "LINC02265"   
##  [6] "MFSD14C"      "MLXIP"        "MMP28"        "NAA16"        "NPIPB5"      
## [11] "TMEM161B-AS1" "TMEM71"       "TRAV8-2"      "ZNF91"        "AC118549.1"  
## [16] "IGLV3-25"     "TMEM138"      "TTC38"

## [1] 37
##  [1] "ABHD5"      "AC013264.1" "CAMK4"      "GCH1"       "IFRD1"     
##  [6] "KIAA1328"   "KLRB1"      "MID1IP1"    "NRIP1"      "OTUD5"     
## [11] "PHLDA1"     "POU2F2"     "RGCC"       "SNRK"       "TLE4"      
## [16] "TOX"        "TSPYL4"     "ZFAS1"

## [1] 38
##  [1] "ATG13"    "C3orf58"  "CD38"     "FCGR3A"   "FCN1"     "HIST1H3H"
##  [7] "IER5"     "LIPT1"    "RIC1"     "TBC1D7"   "TMC8"     "TTC31"   
## [13] "ZBTB25"   "ZC3H12A"  "ZC3H12D"  "APOBEC3G" "GZMA"     "MATK"

## [1] 39
##  [1] "C20orf204" "CARMIL2"   "DDX3Y"     "EIF1AY"    "KDM5D"     "LY96"     
##  [7] "OSM"       "RNF157"    "RPS4Y1"    "SOCS3"     "TTTY15"    "UTY"      
## [13] "ZFYVE28"

## [1] 40
##  [1] "ANXA2R"  "ARL4A"   "COQ10B"  "COQ7"    "COQ8A"   "EFCAB2"  "EGR1"   
##  [8] "ODC1"    "SNHG12"  "TAGAP"   "TCP11L2" "TCTA"    "WSB1"    "APBB1IP"
## [15] "ARL4C"   "BUD23"   "CCDC43"  "UBE3B"

for(n1 in names(goseq_res)){
  k = as.numeric(gsub("set_", "", n1))
  print(n1)
  print(gene_sets[[k]])
  print(goseq_res[[n1]])

}
## [1] "set_2"
##  [1] "AC116407.2"  "ADGRG1"      "APOL1"       "BISPR"       "CCL4"       
##  [6] "CD300A"      "GPR132"      "GPR65"       "GRK2"        "LINC01871"  
## [11] "MIAT"        "MIR4435-2HG" "MYBL1"       "S1PR5"       "TSPAN32"    
## [16] "USP30-AS1"   "ZNF683"     
## $reactome
##                               category over_represented_pvalue
## 158 class a 1 rhodopsin like receptors            1.215063e-06
## 413                gpcr ligand binding            7.213076e-06
## 359        g alpha q signalling events            1.337138e-04
##     under_represented_pvalue numDEInCat numInCat         FDR
## 158                1.0000000          4       16 0.001459290
## 413                0.9999999          4       24 0.004331452
## 359                0.9999980          3       18 0.053530101
## 
## [1] "set_7"
##  [1] "GGT7"    "ZNF490"  "ASCL2"   "BCL9L"   "CCDC112" "CD58"    "EHMT1"  
##  [8] "GALNS"   "GBP1"    "GBP3"    "GBP5"    "GZMH"    "HIVEP3"  "IFI35"  
## [15] "KLF9"    "RAB8B"   "SCAF8"   "TBX21"  
## $go_bp
##                                                   category
## 693 disruption of anatomical structure in another organism
##     over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 693            8.708209e-06                0.9999999          4       17
##            FDR
## 693 0.04165136
## 
## [1] "set_13"
##  [1] "CRLF3"    "GIMAP8"   "HOXB2"    "IGLV2-14" "MAST4"    "ODF2L"   
##  [7] "SIMC1"    "THAP6"    "ZFX"      "DENND4B"  "FCRL6"    "IFI44"   
## [13] "IFI44L"   "MX2"      "OAS1"     "OAS2"     "PHF23"    "PRSS23"  
## [19] "RSAD2"   
## $reactome
##                            category over_represented_pvalue
## 476 interferon alpha beta signaling            4.578903e-05
##     under_represented_pvalue numDEInCat numInCat        FDR
## 476                 0.999999          4       38 0.05499263
## 
## $immune
##                                                                      category
## 4937                               hoek neutrophil 2011 2012 tiv adult 7dy dn
## 4935                               hoek neutrophil 2011 2012 tiv adult 1dy up
## 5062                            querec pbmc yf 17d vaccine age 18 45yo 3dy up
## 5063                            querec pbmc yf 17d vaccine age 18 45yo 7dy up
## 5102                     zak pbmc mrkad5 hiv 1 gag pol nef age 20 50yo 3dy up
## 2127              gse24081 controller vs progressor hiv specific cd8 tcell dn
## 4047                             gse42021 tconv pln vs cd24hi tconv thymus dn
## 13   erwin cohen blood vaccine tc 83 age 23 48yo vaccinated vs control 7dy up
## 54                            gaucher pbmc yf vax stamaril unknown age 3dy up
## 1289             gse17974 il4 and anti il12 vs untreated 24h act cd4 tcell dn
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 4937            3.906394e-08                1.0000000          5       12
## 4935            3.942643e-08                1.0000000          5       12
## 5062            8.714928e-08                1.0000000          6       27
## 5063            8.714928e-08                1.0000000          6       27
## 5102            8.835708e-08                1.0000000          7       45
## 2127            1.594941e-07                1.0000000          7       48
## 4047            5.380862e-07                1.0000000          6       35
## 13              8.030557e-07                1.0000000          6       38
## 54              1.921617e-06                0.9999999          7       69
## 1289            2.555537e-06                0.9999999          6       45
##               FDR
## 4937 9.023025e-05
## 4935 9.023025e-05
## 5062 9.023025e-05
## 5063 9.023025e-05
## 5102 9.023025e-05
## 2127 1.357294e-04
## 4047 3.924954e-04
## 13   5.125503e-04
## 54   1.090197e-03
## 1289 1.255580e-03
## 
## [1] "set_18"
##  [1] "ENOSF1"   "FAM227B"  "NSUN6"    "TBCCD1"   "CFD"      "CRYBG1"  
##  [7] "ECPAS"    "ITK"      "LPIN1"    "LPIN2"    "NNT-AS1"  "ODF3B"   
## [13] "RTKN2"    "SAMD9L"   "TOMM70"   "TRBV6-1"  "TTC16"    "Z93930.2"
## [19] "ZBP1"    
## $reactome
##                                    category over_represented_pvalue
## 1170              triglyceride biosynthesis            2.644597e-05
## 1073                        synthesis of pe            8.478266e-05
## 230  depolymerization of the nuclear lamina            2.710994e-04
## 1172                triglyceride metabolism            2.980980e-04
##      under_represented_pvalue numDEInCat numInCat        FDR
## 1170                1.0000000          2        2 0.03176161
## 1073                0.9999999          2        3 0.05091199
## 230                 0.9999989          2        5 0.08950393
## 1172                0.9999987          2        5 0.08950393
## 
## [1] "set_25"
##  [1] "CARHSP1" "CCDC18"  "CEP120"  "CEP95"   "CHIC2"   "COG5"    "LCLAT1" 
##  [8] "MAP3K2"  "NSMAF"   "PECAM1"  "POC5"    "SBF2"    "SNX18"   "FNDC3B" 
## [15] "GCA"     "LAG3"    "SBF1"   
## $immune
##                                                                               category
## 5031 nakaya pbmc fluarix fluvirin age 18 50yo correlated with hai 28dy response at 7dy
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 5031            1.650058e-05                0.9999997          4       19
##             FDR
## 5031 0.08425197
## 
## [1] "set_31"
##  [1] "BTN3A1"     "C17orf49"   "CTSF"       "ERAP2"      "EVI2B"     
##  [6] "HIVEP2"     "IPCEF1"     "METAP1"     "MTO1"       "ORC4"      
## [11] "PITPNA-AS1" "TMEM154"    "TMEM245"    "WHAMM"      "YY1AP1"    
## [16] "BTN3A2"     "KLRG1"      "TMEM175"   
## $reactome
##                                 category over_represented_pvalue
## 110 butyrophilin btn family interactions            4.716898e-05
##     under_represented_pvalue numDEInCat numInCat        FDR
## 110                        1          2        2 0.05664994
## 
## [1] "set_39"
##  [1] "C20orf204" "CARMIL2"   "DDX3Y"     "EIF1AY"    "KDM5D"     "LY96"     
##  [7] "OSM"       "RNF157"    "RPS4Y1"    "SOCS3"     "TTTY15"    "UTY"      
## [13] "ZFYVE28"  
## $immune
##                                                          category
## 827  gse16450 immature vs mature neuron cell line 6h ifna stim dn
## 4353         gse5099 classical m1 vs alternative m2 macrophage dn
## 3753                       gse39820 ctrl vs il1b il6 cd4 tcell dn
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 827             3.999097e-06                0.9999999          4       20
## 4353            5.322847e-06                0.9999999          4       21
## 3753            5.694788e-06                0.9999998          5       48
##              FDR
## 827  0.009692529
## 4353 0.009692529
## 3753 0.009692529
## 
## [1] "set_40"
##  [1] "ANXA2R"  "ARL4A"   "COQ10B"  "COQ7"    "COQ8A"   "EFCAB2"  "EGR1"   
##  [8] "ODC1"    "SNHG12"  "TAGAP"   "TCP11L2" "TCTA"    "WSB1"    "APBB1IP"
## [15] "ARL4C"   "BUD23"   "CCDC43"  "UBE3B"  
## $go_bp
##                               category over_represented_pvalue
## 1257       ketone biosynthetic process            3.484787e-07
## 4687      ubiquinone metabolic process            5.953627e-06
## 3236         quinone metabolic process            2.038389e-05
## 395  cellular ketone metabolic process            7.898762e-05
##      under_represented_pvalue numDEInCat numInCat         FDR
## 1257                1.0000000          4        8 0.001666774
## 4687                1.0000000          3        5 0.014238099
## 3236                0.9999999          3        7 0.032498709
## 395                 0.9999975          4       29 0.094449448
saveRDS(goseq_res, sprintf("output/gene_set_enrichments_%s.RDS", 
                           file_tag))

Session information

gc()
##            used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  8958285 478.5   16391124 875.4         NA 16391124 875.4
## Vcells 19171499 146.3   57912627 441.9      65536 77218972 589.2
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] TxDb.Hsapiens.UCSC.hg38.knownGene_3.16.0
##  [2] GenomicFeatures_1.50.4                  
##  [3] GenomicRanges_1.50.2                    
##  [4] GenomeInfoDb_1.34.9                     
##  [5] org.Hs.eg.db_3.16.0                     
##  [6] AnnotationDbi_1.60.2                    
##  [7] IRanges_2.32.0                          
##  [8] S4Vectors_0.36.2                        
##  [9] Biobase_2.58.0                          
## [10] BiocGenerics_0.44.0                     
## [11] goseq_1.50.0                            
## [12] geneLenDataBase_1.34.0                  
## [13] BiasedUrn_2.0.10                        
## [14] fgsea_1.24.0                            
## [15] biomaRt_2.54.1                          
## [16] limma_3.54.2                            
## [17] tidyr_1.3.0                             
## [18] ggpubr_0.6.0                            
## [19] ggplot2_3.4.2                           
## [20] data.table_1.14.8                       
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-162                matrixStats_1.0.0          
##  [3] bitops_1.0-7                bit64_4.0.5                
##  [5] filelock_1.0.2              progress_1.2.2             
##  [7] httr_1.4.6                  tools_4.2.3                
##  [9] backports_1.4.1             bslib_0.4.2                
## [11] utf8_1.2.3                  R6_2.5.1                   
## [13] mgcv_1.8-42                 DBI_1.1.3                  
## [15] colorspace_2.1-0            withr_2.5.0                
## [17] tidyselect_1.2.0            prettyunits_1.1.1          
## [19] bit_4.0.5                   curl_5.0.1                 
## [21] compiler_4.2.3              cli_3.6.1                  
## [23] xml2_1.3.4                  DelayedArray_0.24.0        
## [25] rtracklayer_1.58.0          sass_0.4.5                 
## [27] scales_1.2.1                rappdirs_0.3.3             
## [29] Rsamtools_2.14.0            stringr_1.5.0              
## [31] digest_0.6.31               rmarkdown_2.21             
## [33] XVector_0.38.0              pkgconfig_2.0.3            
## [35] htmltools_0.5.5             MatrixGenerics_1.10.0      
## [37] dbplyr_2.3.2                fastmap_1.1.1              
## [39] rlang_1.1.0                 rstudioapi_0.14            
## [41] RSQLite_2.3.1               BiocIO_1.8.0               
## [43] jquerylib_0.1.4             generics_0.1.3             
## [45] jsonlite_1.8.4              BiocParallel_1.32.6        
## [47] dplyr_1.1.2                 car_3.1-2                  
## [49] RCurl_1.98-1.12             magrittr_2.0.3             
## [51] GO.db_3.16.0                GenomeInfoDbData_1.2.9     
## [53] Matrix_1.6-4                Rcpp_1.0.10                
## [55] munsell_0.5.0               fansi_1.0.4                
## [57] abind_1.4-5                 lifecycle_1.0.3            
## [59] stringi_1.7.12              yaml_2.3.7                 
## [61] carData_3.0-5               SummarizedExperiment_1.28.0
## [63] zlibbioc_1.44.0             BiocFileCache_2.6.1        
## [65] grid_4.2.3                  blob_1.2.4                 
## [67] parallel_4.2.3              crayon_1.5.2               
## [69] lattice_0.20-45             splines_4.2.3              
## [71] Biostrings_2.66.0           cowplot_1.1.1              
## [73] hms_1.1.3                   KEGGREST_1.38.0            
## [75] knitr_1.44                  pillar_1.9.0               
## [77] rjson_0.2.21                ggsignif_0.6.4             
## [79] codetools_0.2-19            fastmatch_1.1-3            
## [81] XML_3.99-0.14               glue_1.6.2                 
## [83] evaluate_0.20               png_0.1-8                  
## [85] vctrs_0.6.2                 gtable_0.3.3               
## [87] purrr_1.0.1                 cachem_1.0.7               
## [89] xfun_0.39                   broom_1.0.4                
## [91] restfulr_0.0.15             rstatix_0.7.2              
## [93] tibble_3.2.1                GenomicAlignments_1.34.1   
## [95] memoise_2.0.1